library(ggplot2)
#retrieve the gapminder data
system('wget http://hwheeler01.github.io/comp150/ggplot/gapminder.csv' )
#load data
gap <- read.table("gapminder.csv",sep=",",header=TRUE)
#use str() to find out more about the data.frame
str(gap)
## 'data.frame': 1698 obs. of 6 variables:
## $ country : Factor w/ 147 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#view the first few rows
head(gap)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
#to see more
head(gap, 20)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## 11 Afghanistan Asia 2002 42.129 25268405 726.7341
## 12 Afghanistan Asia 2007 43.828 31889923 974.5803
## 13 Albania Europe 1952 55.230 1282697 1601.0561
## 14 Albania Europe 1957 59.280 1476505 1942.2842
## 15 Albania Europe 1962 64.820 1728137 2312.8890
## 16 Albania Europe 1967 66.220 1984060 2760.1969
## 17 Albania Europe 1972 67.690 2263554 3313.4222
## 18 Albania Europe 1977 68.930 2509048 3533.0039
## 19 Albania Europe 1982 70.420 2780097 3630.8807
## 20 Albania Europe 1987 72.000 3075321 3738.9327
#view last few rows
tail(gap)
## country continent year lifeExp pop gdpPercap
## 1693 Zimbabwe Africa 1982 60.363 7636524 788.8550
## 1694 Zimbabwe Africa 1987 62.351 9216418 706.1573
## 1695 Zimbabwe Africa 1992 60.377 10704340 693.4208
## 1696 Zimbabwe Africa 1997 46.809 11404948 792.4500
## 1697 Zimbabwe Africa 2002 39.989 11926563 672.0386
## 1698 Zimbabwe Africa 2007 43.487 12311143 469.7093
#let's initialize a plot
ggplot(gap, aes(x=gdpPercap,y=lifeExp))

#aes stands for aesthetics and is where you tell ggplot what you want on the axes
#let's add points
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point()

#let's log tranform the x-axis
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point() + scale_x_log10()

#how about some color?
ggplot(gap, aes(x=gdpPercap,y=lifeExp,color=continent)) + geom_point() + scale_x_log10()

### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point()

### Let's separate by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point() + facet_wrap(~continent)

### Let's remove Oceania and connect countries with lines
# this requires the package dplyr to filter
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
no_oceania <- dplyr::filter(gap,continent != "Oceania")
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent)

### Let's pull out a subset of countries to plot
clist <- c("United States", "Mexico", "Canada")
subgap <- dplyr::filter(gap, country %in% clist)
ggplot(subgap, aes(x=year, y=lifeExp, color=country)) + geom_line() + geom_point()

### We can also do boxplots
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot()

### and change axis labels
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot() + xlab("Continent") + ylab("Life Expectancy (years)")

### We can also do histograms
ggplot(gap, aes(x=lifeExp)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### we can adjust binwidth
ggplot(gap, aes(x=lifeExp)) + geom_histogram(binwidth = 1)

### we can make density plots
ggplot(gap, aes(x=lifeExp)) + geom_density()

### and color by continent
ggplot(gap, aes(x=lifeExp, color = continent)) + geom_density()

### we can facet anything
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_histogram(binwidth = 1) + facet_wrap(~continent)

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() + facet_wrap(~continent,ncol=1)

Let’s play with colors!
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_manual(values = c('red','purple','darkgreen','orange'))

### Print a list of options from the color brewer
library(RColorBrewer)
display.brewer.all()

### Choose a color brewer palette
# type = One of seq (sequential), div (diverging) or qual (qualitative)
# palette = If a string, will use that named palette (see above). If a number, will index into the list of palettes of appropriate type
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(palette = 'Dark2')

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() + facet_wrap(~continent,ncol=1) + scale_fill_brewer(type = 'seq', palette = 1)

### Change background theme
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(type = 'div', palette = 2) + theme_bw()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + scale_color_brewer(palette = 'PuOr') + theme_classic()

library(ggthemes)
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + theme_economist() + scale_color_economist()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + facet_wrap(~continent) + theme_wsj() + scale_color_wsj()
